Species
1 setosa
2 setosa
Species
1 setosa
2 setosa
SPECIES
1 setosa
2 setosa
SPECIES
1 setosa
2 setosa
Species
1 setosa
2 setosa
Species
1 setosa
2 setosa
SPECIES
1 setosa
2 setosa
SPECIES
1 setosa
2 setosa
# A tibble: 3 × 4
name age salary department
<chr> <dbl> <dbl> <chr>
1 Alice 23 50000 Sales
2 Bob 35 60000 Marketing
3 Charlie 45 70000 IT
# A tibble: 3 × 3
old_name1 old_name2 old_name3
<dbl> <chr> <lgl>
1 1 A TRUE
2 2 B FALSE
3 3 C TRUE
# A tibble: 3 × 2
old_name new_name
<chr> <chr>
1 old_name1 new_name1
2 old_name2 new_name2
3 old_name3 new_name3
columns<-c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
iris%>%summarise_at(columns,.funs=list(mean, median)) Sepal.Length_fn1 Sepal.Width_fn1 Petal.Length_fn1 Petal.Width_fn1
1 5.843333 3.057333 3.758 1.199333
Sepal.Length_fn2 Sepal.Width_fn2 Petal.Length_fn2 Petal.Width_fn2
1 5.8 3 4.35 1.3
Sepal.Length_fn1 Sepal.Width_fn1 Petal.Length_fn1 Petal.Width_fn1
1 5.843333 3.057333 3.758 1.199333
Sepal.Length_fn2 Sepal.Width_fn2 Petal.Length_fn2 Petal.Width_fn2
1 5.8 3 4.35 1.3
columns<-c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
iris%>%summarise(across(.cols=all_of(columns),.fns=list(mean, median))) Sepal.Length_1 Sepal.Length_2 Sepal.Width_1 Sepal.Width_2 Petal.Length_1
1 5.843333 5.8 3.057333 3 3.758
Petal.Length_2 Petal.Width_1 Petal.Width_2
1 4.35 1.199333 1.3
Sepal.Length_1 Sepal.Length_2 Sepal.Width_1 Sepal.Width_2 Petal.Length_1
1 5.843333 5.8 3.057333 3 3.758
Petal.Length_2 Petal.Width_1 Petal.Width_2
1 4.35 1.199333 1.3
iris %>%
mutate(AverageLength=(Sepal.Length+Petal.Length)/2,
AverageWidth=(Sepal.Width+Petal.Width)/2)%>%
head(3) Sepal.Length Sepal.Width Petal.Length Petal.Width Species AverageLength
1 5.1 3.5 1.4 0.2 setosa 3.25
2 4.9 3.0 1.4 0.2 setosa 3.15
3 4.7 3.2 1.3 0.2 setosa 3.00
AverageWidth
1 1.85
2 1.60
3 1.70
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
# A tibble: 3 × 9
Species Sepal.Length_1 Sepal.Length_2 Sepal.Width_1 Sepal.Width_2
<fct> <dbl> <dbl> <dbl> <dbl>
1 setosa 5.01 5 3.43 3.4
2 versicolor 5.94 5.9 2.77 2.8
3 virginica 6.59 6.5 2.97 3
# ℹ 4 more variables: Petal.Length_1 <dbl>, Petal.Length_2 <dbl>,
# Petal.Width_1 <dbl>, Petal.Width_2 <dbl>
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 6.3 3.3 6.0 2.5 virginica
2 7.2 3.6 6.1 2.5 virginica
3 6.7 3.3 5.7 2.5 virginica
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 6.3 3.3 6.0 2.5 virginica
2 7.2 3.6 6.1 2.5 virginica
3 6.7 3.3 5.7 2.5 virginica
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 6.3 3.3 6.0 2.5 virginica
2 7.2 3.6 6.1 2.5 virginica
df<-iris%>%slice_sample(n=10)
df%>%mutate(id=row_number(), groups=ntile(Species,5),
minrank=min_rank(Species), denserank=dense_rank(Species))%>%
select(-contains("Petal"),-contains("Sepal"),) Species id groups minrank denserank
1 versicolor 1 1 2 2
2 versicolor 2 2 2 2
3 versicolor 3 2 2 2
4 virginica 4 4 8 3
5 setosa 5 1 1 1
6 versicolor 6 3 2 2
7 virginica 7 5 8 3
8 virginica 8 5 8 3
9 versicolor 9 3 2 2
10 versicolor 10 4 2 2
df<-iris%>%slice_sample(n=10)
df%>%group_by(Species)%>%arrange(desc(Petal.Length))%>%
mutate(id=row_number())%>%
ungroup()%>%
select(Petal.Length, Species, id)%>%arrange(Species)# A tibble: 10 × 3
Petal.Length Species id
<dbl> <fct> <int>
1 1.9 setosa 1
2 1.9 setosa 2
3 1.7 setosa 3
4 1.4 setosa 4
5 4.4 versicolor 1
6 4 versicolor 2
7 6.3 virginica 1
8 5.7 virginica 2
9 5.5 virginica 3
10 5.3 virginica 4
df %>%
pivot_longer(cols=month_aug:month_oct,names_prefix="month_",
names_to = "Month", values_to = "Temperature", values_drop_na=TRUE) %>%
head(6)# A tibble: 6 × 3
day Month Temperature
<chr> <chr> <dbl>
1 Monday aug 46
2 Monday sep 62
3 Monday oct 43
4 Tuesday aug 76
5 Tuesday sep 67
6 Wednesday aug 32
# A tibble: 3 × 4
name height_cm weight_kg class
<chr> <dbl> <dbl> <chr>
1 Alice 170 65 A
2 Bob 180 75 A
3 Charlie 175 68 B
df_long <- df %>%
pivot_longer(
cols = c(height_cm, weight_kg, class),
names_to = "measurement_type",
values_to = "value",
values_transform = list(value=as.character)
)
df_long# A tibble: 9 × 3
name measurement_type value
<chr> <chr> <chr>
1 Alice height_cm 170
2 Alice weight_kg 65
3 Alice class A
4 Bob height_cm 180
5 Bob weight_kg 75
6 Bob class A
7 Charlie height_cm 175
8 Charlie weight_kg 68
9 Charlie class B
using in mutate
function has to be vectorized; if not, use rowwise() to go row by row
vectorized function
# A tibble: 3 × 7
# Rowwise:
Ozone Solar.R Wind Temp Month Day something
<int> <int> <dbl> <int> <int> <int> <int>
1 41 190 7.4 67 5 1 6
2 36 118 8 72 5 2 7
3 12 149 12.6 74 5 3 8
airquality%>%mutate(id=row_number())%>%group_by(id)%>%
mutate(something=stupidFunction(Month, Day))%>%ungroup()%>%head(3)# A tibble: 3 × 8
Ozone Solar.R Wind Temp Month Day id something
<int> <int> <dbl> <int> <int> <int> <int> <int>
1 41 190 7.4 67 5 1 1 6
2 36 118 8 72 5 2 2 7
3 12 149 12.6 74 5 3 3 8
# A tibble: 3 × 3
name band plays
<chr> <chr> <chr>
1 John Beatles guitar
2 Paul Beatles bass
3 Keith <NA> guitar
# A tibble: 4 × 3
name band plays
<chr> <chr> <chr>
1 Mick Stones <NA>
2 John Beatles guitar
3 Paul Beatles bass
4 Keith <NA> guitar
r<-data.frame(year=2005:2014,population=sample(14000:15000, 10, replace=T))
r<-cbind(r,lag(r$population))
names(r)<-c("year","pop","pop1")
r%>%
mutate(index=round(100*(1+(pop-pop1)/pop1),2))%>%
select(year,pop,pop1,index) year pop pop1 index
1 2005 14138 NA NA
2 2006 14760 14138 104.40
3 2007 14801 14760 100.28
4 2008 14433 14801 97.51
5 2009 14749 14433 102.19
6 2010 14209 14749 96.34
7 2011 14694 14209 103.41
8 2012 14049 14694 95.61
9 2013 14141 14049 100.65
10 2014 14214 14141 100.52
df<-as.data.frame(Titanic)%>%select(Class, Sex, Age, Survived)%>%
dplyr::filter(Class!="Crew")
dfIfElse<-df%>%
mutate(Comment=ifelse(Age=="Child" & Survived=="No","No Child should die!",
ifelse(Age=="Adult" & Sex=="Male" & Survived=="Yes",
"He should help women and children!","No comment")))%>%
slice_sample(n=10)
dfIfElse Class Sex Age Survived Comment
1 1st Female Adult Yes No comment
2 3rd Male Child Yes No comment
3 3rd Male Adult No No comment
4 3rd Female Adult Yes No comment
5 2nd Male Child No No Child should die!
6 1st Female Child No No Child should die!
7 1st Male Adult Yes He should help women and children!
8 2nd Female Child No No Child should die!
9 2nd Male Child Yes No comment
10 3rd Female Child Yes No comment
df%>%
mutate(Comment=case_when(Age=="Child" & Survived=="No"~"No Child should die!",
Age=="Adult" & Sex=="Male" & Survived=="Yes"~"He should help women and children!",
TRUE~"No comment"))%>%
slice_sample(n=10) Class Sex Age Survived Comment
1 3rd Female Child Yes No comment
2 1st Male Adult Yes He should help women and children!
3 2nd Female Child No No Child should die!
4 1st Female Adult Yes No comment
5 2nd Female Adult Yes No comment
6 2nd Female Adult No No comment
7 2nd Male Child No No Child should die!
8 3rd Female Adult No No comment
9 1st Female Child No No Child should die!
10 1st Female Adult No No comment
Hair Eye Sex Freq
1 Black Brown Male 32
2 Brown Brown Male 53
3 Red Brown Male 10
4 Blond Brown Male 3
df<-data%>%unite("Properties",Hair:Sex, sep="/")
df%>%separate(Properties, into=c("A","B","C"),sep="/")%>%
head(3) A B C Freq
1 Black Brown Male 32
2 Brown Brown Male 53
3 Red Brown Male 10
First_name Last_Name
1 Sam Jones
2 Lady Gaga
3 Valentino Rossi
# A tibble: 3 × 2
Species data
<fct> <list>
1 setosa <tibble [50 × 4]>
2 versicolor <tibble [50 × 4]>
3 virginica <tibble [50 × 4]>
1. Take airmiles dataset and transform dataframe to a column with name Airmiles.
Add a new column called Year from 1937 to 1960.
Calculate index for Airmiles. Which year has the highest index.
2. Take dates_df dataframe and split date column into columns month, day and year!
dates_df <- data.frame(date = c("5/24/1930",
"5/25/1930",
"5/26/1930",
"5/27/1930",
"5/28/1930"),
stringsAsFactors = FALSE)
3. Create 100 random numbers between 1 and 95 and transform it to a column Age in a dataframe
People.
Discretize Age into the following age groups:
- 1:20
- 21:45
- 46:55
- 56:70
- 71:
Calculate summary statistics.
airquality%>%select(Month, Day)%>%mutate(Something=map2_int(Month,Day,sum))%>%
head(3)%>%as_tibble()# A tibble: 3 × 3
Month Day Something
<int> <int> <int>
1 5 1 6
2 5 2 7
3 5 3 8
airquality%>%select(Month, Day)%>%mutate(Something=map2_chr(Month,Day,sum))%>%
head(3)%>%as_tibble()# A tibble: 3 × 3
Month Day Something
<int> <int> <chr>
1 5 1 6
2 5 2 7
3 5 3 8
airquality%>%select(Wind,Temp, Month, Day)%>%
mutate(Something=pmap_dbl(list(Wind,Temp,Month,Day),sum))%>%
head(3)%>%as_tibble()# A tibble: 3 × 5
Wind Temp Month Day Something
<dbl> <int> <int> <int> <dbl>
1 7.4 67 5 1 80.4
2 8 72 5 2 87
3 12.6 74 5 3 94.6
airquality%>%select(Wind,Temp, Month, Day)%>%
mutate(Something=pmap_chr(list(Wind,Temp,Month,Day),sum))%>%
head(3)%>%as_tibble()# A tibble: 3 × 5
Wind Temp Month Day Something
<dbl> <int> <int> <int> <chr>
1 7.4 67 5 1 80.400000
2 8 72 5 2 87.000000
3 12.6 74 5 3 94.600000
airquality%>%select(Ozone,Day)%>%dplyr::filter(is.na(Ozone))%>%
mutate(Something=map2_int(Ozone,Day,max))%>%
head(3)%>%as_tibble()# A tibble: 3 × 3
Ozone Day Something
<int> <int> <int>
1 NA 5 NA
2 NA 10 NA
3 NA 25 NA
airquality%>%select(Ozone,Day)%>%dplyr::filter(is.na(Ozone))%>%
mutate(Something=map2_int(Ozone,Day,max, na.rm=TRUE))%>%
head(3)%>%as_tibble()# A tibble: 3 × 3
Ozone Day Something
<int> <int> <int>
1 NA 5 5
2 NA 10 10
3 NA 25 25
call1<-mtcars%>%select(cyl,mpg,wt)%>%
nest(-cyl)%>%mutate(model=map(data,function(x) lm(formula=mpg~wt,data=x)))
call2<-mtcars%>%select(cyl,mpg,wt)%>%
nest(-cyl)%>%mutate(model=map(data,~lm(formula=mpg~wt,data=.x)))
call1# A tibble: 3 × 3
cyl data model
<dbl> <list> <list>
1 6 <tibble [7 × 2]> <lm>
2 4 <tibble [11 × 2]> <lm>
3 8 <tibble [14 × 2]> <lm>
# A tibble: 6 × 8
cyl data model term estimate std.error statistic p.value
<dbl> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl>
1 6 <tibble [7 × 2]> <lm> (Intercep… 28.4 4.18 6.79 1.05e-3
2 6 <tibble [7 × 2]> <lm> wt -2.78 1.33 -2.08 9.18e-2
3 4 <tibble [11 × 2]> <lm> (Intercep… 39.6 4.35 9.10 7.77e-6
4 4 <tibble [11 × 2]> <lm> wt -5.65 1.85 -3.05 1.37e-2
5 8 <tibble [14 × 2]> <lm> (Intercep… 23.9 3.01 7.94 4.05e-6
6 8 <tibble [14 × 2]> <lm> wt -2.19 0.739 -2.97 1.18e-2
df<-sp500 %>%
dplyr::filter(between(date,ymd("2015-12-24"),ymd("2015-12-31"))) %>%
select(-adj_close) %>% arrange(date)
df %>% gt()| date | open | high | low | close | volume |
|---|---|---|---|---|---|
| 2015-12-24 | 2063.52 | 2067.36 | 2058.73 | 2060.99 | 1411860000 |
| 2015-12-28 | 2057.77 | 2057.77 | 2044.20 | 2056.50 | 2492510000 |
| 2015-12-29 | 2060.54 | 2081.56 | 2060.54 | 2078.36 | 2542000000 |
| 2015-12-30 | 2077.34 | 2077.34 | 2061.97 | 2063.36 | 2367430000 |
| 2015-12-31 | 2060.59 | 2062.54 | 2043.62 | 2043.94 | 2655330000 |
dfGT<-df %>% gt() %>%
tab_header(title="SP500",
subtitle="Last week of 2015") %>%
fmt_date(columns = vars(date),date_style = 7) %>%
fmt_currency(columns = vars(open, high, low, close),
currency = "EUR")
dfGT| SP500 | |||||
| Last week of 2015 | |||||
| date | open | high | low | close | volume |
|---|---|---|---|---|---|
| 24 Dec 2015 | €2,063.52 | €2,067.36 | €2,058.73 | €2,060.99 | 1411860000 |
| 28 Dec 2015 | €2,057.77 | €2,057.77 | €2,044.20 | €2,056.50 | 2492510000 |
| 29 Dec 2015 | €2,060.54 | €2,081.56 | €2,060.54 | €2,078.36 | 2542000000 |
| 30 Dec 2015 | €2,077.34 | €2,077.34 | €2,061.97 | €2,063.36 | 2367430000 |
| 31 Dec 2015 | €2,060.59 | €2,062.54 | €2,043.62 | €2,043.94 | 2655330000 |
dfgt <- sp500 %>%
mutate(year=year(date),wday=wday(date, label=TRUE, abbr=FALSE, locale = "english"))%>%
select(year, wday, high, low) %>%
group_by(year, wday) %>%
summarise(high=mean(high), low=mean(low)) %>%
dplyr::filter(year %in% c(2014,2015)) %>%
ungroup()
dfgt# A tibble: 10 × 4
year wday high low
<dbl> <ord> <dbl> <dbl>
1 2014 Monday 1941. 1925.
2 2014 Tuesday 1940. 1924.
3 2014 Wednesday 1941. 1925.
4 2014 Thursday 1933. 1914.
5 2014 Friday 1939. 1923.
6 2015 Monday 2072. 2050.
7 2015 Tuesday 2070. 2047.
8 2015 Wednesday 2072. 2049.
9 2015 Thursday 2073. 2051.
10 2015 Friday 2072. 2050.
| wday | high | low | |
|---|---|---|---|
| 2014 | Monday | €1,941.05 | €1,924.95 |
| 2014 | Tuesday | €1,940.11 | €1,924.20 |
| 2014 | Wednesday | €1,940.99 | €1,925.14 |
| 2014 | Thursday | €1,932.90 | €1,913.93 |
| 2014 | Friday | €1,939.08 | €1,923.17 |
| 2015 | Monday | €2,071.82 | €2,049.77 |
| 2015 | Tuesday | €2,070.48 | €2,047.26 |
| 2015 | Wednesday | €2,071.74 | €2,048.60 |
| 2015 | Thursday | €2,073.29 | €2,051.18 |
| 2015 | Friday | €2,072.02 | €2,049.98 |
dfgtL<-dfgt %>% gt(groupname_col = "year") %>% fmt_currency(columns=vars(high,low), currency = "EUR")
dfgtL| wday | high | low |
|---|---|---|
| 2014 | ||
| Monday | €1,941.05 | €1,924.95 |
| Tuesday | €1,940.11 | €1,924.20 |
| Wednesday | €1,940.99 | €1,925.14 |
| Thursday | €1,932.90 | €1,913.93 |
| Friday | €1,939.08 | €1,923.17 |
| 2015 | ||
| Monday | €2,071.82 | €2,049.77 |
| Tuesday | €2,070.48 | €2,047.26 |
| Wednesday | €2,071.74 | €2,048.60 |
| Thursday | €2,073.29 | €2,051.18 |
| Friday | €2,072.02 | €2,049.98 |
dfgt %>% dplyr::filter(wday %in% c("Monday","Tuesday")) %>%
gt(groupname_col = "year") %>% fmt_currency(columns=vars(high,low),
currency = "EUR") %>%
summary_rows(
columns = vars(high, low),
fns = list(average = "mean"))| wday | high | low | |
|---|---|---|---|
| 2014 | |||
| Monday | €1,941.05 | €1,924.95 | |
| Tuesday | €1,940.11 | €1,924.20 | |
| mean | — | 1940.579 | 1924.574 |
| 2015 | |||
| Monday | €2,071.82 | €2,049.77 | |
| Tuesday | €2,070.48 | €2,047.26 | |
| mean | — | 2071.150 | 2048.514 |
dfgt %>% dplyr::filter(wday %in% c("Monday","Tuesday")) %>%
gt(groupname_col = "year", rowname_col ="wday") %>%
fmt_currency(columns=vars(high,low),currency = "EUR") %>%
summary_rows(
groups = TRUE,
columns = vars(high, low),
fns = list(
avg = ~mean(., na.rm = TRUE),
total = ~sum(., na.rm = TRUE),
s.d. = ~sd(., na.rm = TRUE)
)
)| high | low | |
|---|---|---|
| 2014 | ||
| Monday | €1,941.05 | €1,924.95 |
| Tuesday | €1,940.11 | €1,924.20 |
| 2015 | ||
| Monday | €2,071.82 | €2,049.77 |
| Tuesday | €2,070.48 | €2,047.26 |
uros.godnov@gmail.com